Load Data

library(ggplot2)
library(data.table)
library(scales)
library(RColorBrewer)
library(reticulate)
library(uwot)
## Loading required package: Matrix
library(gridExtra)
library(spatstat)
## Loading required package: spatstat.data
## Loading required package: nlme
## Loading required package: rpart
## 
## spatstat 1.63-3       (nickname: 'Wet paint') 
## For an introduction to spatstat, type 'beginner'
## 
## Note: R version 3.6.0 (2019-04-26) is more than a year old; we strongly recommend upgrading to the latest version
## 
## Attaching package: 'spatstat'
## The following object is masked from 'package:scales':
## 
##     rescale
## The following object is masked from 'package:data.table':
## 
##     shift
library(ggdendro)
library(cowplot)
## 
## ********************************************************
## Note: As of version 1.0.0, cowplot does not change the
##   default ggplot2 theme anymore. To recover the previous
##   behavior, execute:
##   theme_set(theme_cowplot())
## ********************************************************
library(tidyverse)
## Registered S3 method overwritten by 'cli':
##   method     from    
##   print.boxx spatstat
## ── Attaching packages ───────────────────────────── tidyverse 1.3.0 ──
## ✓ tibble  2.1.3     ✓ dplyr   0.8.5
## ✓ tidyr   1.0.2     ✓ stringr 1.4.0
## ✓ readr   1.3.1     ✓ forcats 0.5.0
## ✓ purrr   0.3.3
## ── Conflicts ──────────────────────────────── tidyverse_conflicts() ──
## x dplyr::between()    masks data.table::between()
## x readr::col_factor() masks scales::col_factor()
## x dplyr::collapse()   masks nlme::collapse()
## x dplyr::combine()    masks gridExtra::combine()
## x purrr::discard()    masks scales::discard()
## x tidyr::expand()     masks Matrix::expand()
## x dplyr::filter()     masks stats::filter()
## x dplyr::first()      masks data.table::first()
## x dplyr::lag()        masks stats::lag()
## x dplyr::last()       masks data.table::last()
## x tidyr::pack()       masks Matrix::pack()
## x purrr::transpose()  masks data.table::transpose()
## x tidyr::unpack()     masks Matrix::unpack()
use_condaenv(condaenv = 'r-reticulate', required = TRUE)

data_dir <- here::here('..','data')
load(file.path(data_dir, 'diff.sampled.Rdata'))
diff_dt <- fread(file.path(data_dir, 'diff.sampled.csv.gz'))

censor_negative_min = -1500
redo_umap_clustering = F
redo_umap_param_search = F

#only redo param search if doing clustering also:
redo_umap_clustering = redo_umap_clustering && redo_umap_clustering

# map day 0 to both plus and minus
map_day_0 <- function(df) {
  return(rbind(
    df[k562!='none'],
    df[k562=='none'][, k562 := 'cd19+'],
    df[k562=='none'][, k562 := 'cd19-']
  ))
}

UMAP

For now, skip straight to high dimensional plotting.

UMAP Functions

#grab channel map from cd4 data (cd4/8 are the same)
channel_map <- diff_opt_cd4$channel_map

# for now use all variables in the channel map except cd4/8
# removing zombie and cd_t (there is no var to differentiate cd4 and cd8 here)
umap_vars <- c(paste0(names(channel_map),'_t'))
umap_vars <- umap_vars[umap_vars != c('cd_t', 'zombie_t')]

sample_for_umap <- function(df, sample_size) {
  umap_dt <- diff_dt[!is.na(event_id), 
    .SD[event_id %in% sample(unique(event_id), 
      min(sample_size, length(unique(event_id))))],
    by=c('well', 'plate', 'day')]
}

cast_for_umap <- function(df) {
  #cast it so variables are columns and
  #subset sample umap data on variables
  umap_cast_dt <- dcast(df, 
    event_id + well + plate + day + t_type ~ variable, 
    value.var='value')
  return(umap_cast_dt)
}

scale_for_umap <- function(df, umap_vars, censor_negative_min) {
  umap_dt_in <- df[, ..umap_vars]
  
  # for points that are very negative, trim the to below the cutoff
  umap_dt_in[umap_dt_in < censor_negative_min] <- censor_negative_min
  
  # scale each input column
  umap_dt_in[ , 
    (names(umap_dt_in)) := lapply(.SD, scale), .SDcols = names(umap_dt_in)]
  
  return(umap_dt_in)
}
  
# check scales:
# ggplot(melt(cbind(umap_dt_in, id=1:nrow(umap_dt_in)), id.var='id'), 
#        aes(x=value)) +
#     geom_density() +
#     facet_grid(variable~.) +
#     scale_fill_distiller(palette='RdYlBu') +
#     theme_bw()

Choose UMAP Params

First, find parameters with a small sample.

# use umap canned functions
umap_dt <- sample_for_umap(diff_dt, 20)
umap_cast_dt <- cast_for_umap(umap_dt)
umap_dt_in <- scale_for_umap(umap_cast_dt, umap_vars, censor_negative_min)

### Run across parameter list

#umap parameter list 
min_dist_set <- c(0.0001, 0.005, 0.1)
n_neighbor_set <- c(3,6,15,30)
learning_rate_set <- c(0.1, 0.25, 0.5, 1) 


umap_params <- data.table(expand.grid(min_dist_set, n_neighbor_set, learning_rate_set))

# run umap via uwot library
umap_out <- umap_params[, {
  print(paste0('Running: ',.BY)); 
  as.data.table(umap(umap_dt_in, min_dist=as.numeric(.BY[1]),
    n_neighbors=as.numeric(.BY[2]), learning_rate=as.numeric(.BY[3]), verbose=F, n_threads=8, n_trees=100))
  }, by=names(umap_params)]

# rename the columns
names(umap_out) <- c('min_dist','n_neighbor', 'learning_rate', 'umap_1','umap_2')

# add the umap output to the input dt
umap_fcs_dt <- cbind(umap_cast_dt[, 1:length(names(umap_cast_dt))], umap_out)
umap_fcs_dt[, id := 1:.N]

umap_fcs_dt <- unique(umap_dt[, 
  .(donor, car, k562, t_type, day, well, event_id)])[
    umap_fcs_dt, on=.(t_type,well,day,event_id)]
color_points <- ggplot(umap_fcs_dt[car %in% c('CD28','41BB','KLRG1','BAFF-R','Zeta')], 
    aes(x=umap_1, y=umap_2, color=interaction(car, t_type))) +
  geom_point(size=0.1, alpha=0.1) + 
  guides(colour = guide_legend(override.aes = list(alpha=1, size=3))) +
  facet_grid(min_dist~n_neighbor+learning_rate) +
  theme_bw()

color_density <- ggplot(umap_fcs_dt, aes(x=umap_1, y=umap_2)) +
  geom_hex(bins = 70) +
  scale_fill_continuous(
    type = "viridis", limits=c(0,30), oob=scales::squish) +
  facet_grid(min_dist~n_neighbor+learning_rate) +
  theme_bw()

grid.arrange(color_points, color_density, ncol=2)

Choosing neighbors == 30 and min_dist == 1e-4.

Checking UMAP Param space

Scaling this to 200 events per well and checking CAR/condition separation.

chosen_dist=0.0001
chosen_n_neighbor=100 # default is 15
chosen_learning_rate=0.1

umap_dt <- diff_dt[!is.na(event_id), 
  .SD[event_id %in% sample(unique(event_id), 
    min(1000, length(unique(event_id))))], #modify minimum parameter for points per well
  by=c('well', 'plate', 'day')]

#grab channel map from cd4 data (cd4/8 are the same)
channel_map <- diff_opt_cd4$channel_map

# for now use all variables in the channel map
umap_vars <- c(paste0(names(channel_map),'_t'))

# removing cd_t and zombie_t from umap_vars
umap_vars <- umap_vars[umap_vars != c('cd_t', 'zombie_t')]

# filter by cd4/8
umap_cd4_dt <- umap_dt %>% filter(t_type=='cd4') 
umap_cd8_dt <- umap_dt %>% filter(t_type=='cd8') 

CD4 UMAP

umap_cd4_cast_dt <- dcast(umap_cd4_dt, 
  event_id + well + plate + day + t_type ~ variable, 
  value.var='value')

umap_cd4_dt_in <- umap_cd4_cast_dt[, umap_vars]

# scale each input column
umap_cd4_dt_in <- scale(umap_cd4_dt_in)

# run umap via uwot library (added learning rate and n_sgd_threads=1)
set.seed(123)
umap_cd4_out <- umap(umap_cd4_dt_in, min_dist=chosen_dist,
    n_neighbors=chosen_n_neighbor, learning_rate=chosen_learning_rate, verbose=T, n_threads=8, n_trees=100, n_sgd_threads=1,
    ret_nn=T)

# nearest neighbors in original space
nearest_neighbors_cd4 <- umap_cd4_out$nn$euclidean$idx
neighbor_dist_cd4 <- umap_cd4_out$nn$euclidean$dist
edge_weights_cd4 <- scales::rescale(-neighbor_dist_cd4, to=c(0,1))
edge_weights_cd4 <- edge_weights_cd4 - min(edge_weights_cd4)
umap_cd4_out <- data.table(umap_cd4_out$embedding)

#nearest neighbors in embedding
umap_nn <- cbind(1:nrow(umap_fcs_dt), 
    nnwhich(umap_fcs_dt[, list(umap_1, umap_2)], k=c(1:3)))
umap_nd <- cbind(rep(0, nrow(umap_fcs_dt)), 
    nndist(umap_fcs_dt[, list(umap_1, umap_2)], k=c(1:3)))
umap_ew <- scales::rescale(-umap_nd, to=c(0,1))
umap_ew <- umap_ew - min(umap_ew)

# rename the columns
names(umap_cd4_out) <- c('umap_1','umap_2')

# add the umap output to the input dt
umap_cd4_fcs_dt <- cbind(umap_cd4_cast_dt[, 1:length(names(umap_cd4_cast_dt))], umap_cd4_out)
umap_cd4_fcs_dt <- tibble::rowid_to_column(umap_cd4_fcs_dt, "id")

# add back the annotations
umap_cd4_fcs_dt <- umap_cd4_fcs_dt %>% left_join(distinct(umap_cd4_dt, donor, car, k562, t_type, day, well, event_id), by=c('t_type', 'day', 'well', 'event_id'))

CD8 UMAP

umap_cd8_cast_dt <- dcast(umap_cd8_dt, 
  event_id + well + plate + day + t_type ~ variable, 
  value.var='value')

umap_cd8_dt_in <- umap_cd8_cast_dt[, umap_vars]

# scale each input column
umap_cd8_dt_in <- scale(umap_cd8_dt_in)

# run umap via uwot library (added learning rate and n_sgd_threads=1)
set.seed(123)
umap_cd8_out <- umap(umap_cd8_dt_in, min_dist=chosen_dist,
    n_neighbors=chosen_n_neighbor, learning_rate=chosen_learning_rate, verbose=T, n_threads=8, n_trees=100, n_sgd_threads=1,
    ret_nn=T)

# nearest neighbors in original space
nearest_neighbors_cd8 <- umap_cd8_out$nn$euclidean$idx
neighbor_dist_cd8 <- umap_cd8_out$nn$euclidean$dist
edge_weights_cd8 <- scales::rescale(-neighbor_dist_cd8, to=c(0,1))
edge_weights_cd8 <- edge_weights_cd8 - min(edge_weights_cd8)
umap_cd8_out <- data.table(umap_cd8_out$embedding)

#nearest neighbors in embedding
umap_nn <- cbind(1:nrow(umap_fcs_dt), 
    nnwhich(umap_fcs_dt[, list(umap_1, umap_2)], k=c(1:3)))
umap_nd <- cbind(rep(0, nrow(umap_fcs_dt)), 
    nndist(umap_fcs_dt[, list(umap_1, umap_2)], k=c(1:3)))
umap_ew <- scales::rescale(-umap_nd, to=c(0,1))
umap_ew <- umap_ew - min(umap_ew)

# rename the columns
names(umap_cd8_out) <- c('umap_1','umap_2')

# add the umap output to the input dt
umap_cd8_fcs_dt <- cbind(umap_cd8_cast_dt[, 1:length(names(umap_cd8_cast_dt))], umap_cd8_out)
umap_cd8_fcs_dt <- tibble::rowid_to_column(umap_cd8_fcs_dt, "id")

# add back the annotations
umap_cd8_fcs_dt <- umap_cd8_fcs_dt %>% left_join(distinct(umap_cd8_dt, donor, car, k562, t_type, day, well, event_id), by=c('t_type', 'day', 'well', 'event_id'))

Clustering via leiden/reticulate

import leidenalg
import igraph as ig
import pandas as pd
import numpy as np
import math

# first, for CD4
edges_cd4 = []
n_neighbors_cd4 = r.nearest_neighbors_cd4.shape[1]
for i, nn_row in enumerate(r.nearest_neighbors_cd4):
    edge_weights_cd4 = r.edge_weights_cd4[i]
    for j in range(1, n_neighbors_cd4):
        edges_cd4.append((int(nn_row[0]), int(nn_row[j]), edge_weights_cd4[j]))
      
knn_graph_cd4 = ig.Graph.TupleList(edges_cd4, directed=True, weights=True)

part_cd4 = leidenalg.find_partition(knn_graph_cd4, 
    leidenalg.ModularityVertexPartition, weights='weight')
#part = leidenalg.find_partition(knn_graph, 
#    leidenalg.CPMVertexPartition, weights='weight', 
#    resolution_parameter=0.05)

cluster_membership_cd4 = [x for _,x in sorted(zip(knn_graph_cd4.vs['name'],part_cd4.membership))]

# again for CD8
edges_cd8 = []
n_neighbors_cd8 = r.nearest_neighbors_cd8.shape[1]
for i, nn_row in enumerate(r.nearest_neighbors_cd8):
    edge_weights_cd8 = r.edge_weights_cd8[i]
    for j in range(1, n_neighbors_cd8):
        edges_cd8.append((int(nn_row[0]), int(nn_row[j]), edge_weights_cd8[j]))
      
knn_graph_cd8 = ig.Graph.TupleList(edges_cd8, directed=True, weights=True)

part_cd8 = leidenalg.find_partition(knn_graph_cd8, 
    leidenalg.ModularityVertexPartition, weights='weight')
#part = leidenalg.find_partition(knn_graph, 
#    leidenalg.CPMVertexPartition, weights='weight', 
#    resolution_parameter=0.05)

cluster_membership_cd8 = [x for _,x in sorted(zip(knn_graph_cd8.vs['name'],part_cd8.membership))]
# convert into data tables
umap_cd4_fcs_dt <- as.data.table(umap_cd4_fcs_dt)
umap_cd8_fcs_dt <- as.data.table(umap_cd8_fcs_dt)

#add new column to umap 
umap_cd4_fcs_dt[, cluster := factor(py$cluster_membership_cd4+1)]
umap_cd8_fcs_dt[, cluster := factor(py$cluster_membership_cd8+1)]
# save the clustering data
fwrite(umap_cd4_fcs_dt, 
  compress='gzip', 
  file=file.path(here::here('..','data','diff.sampled.umap_cd4.csv.gz')))

fwrite(umap_cd8_fcs_dt, 
  compress='gzip', 
  file=file.path(here::here('..','data','diff.sampled.umap_cd8.csv.gz')))

# sync output back to s3
system('aws s3 sync \\
  --exclude "*" \\
  --include "*.Rdata" \\
  --include "*.csv*" \\
  ~/local-tcsl/data s3://roybal-tcsl//lenti_screen_compiled_data/data')

Cluster Analysis

One issue with these clusters is that they are not contiguous in the UMAP embedding: ### cd8 umap plot

umap_cd8_fcs_dt <- fread(
  file.path(
    here::here('..','data','diff.sampled.umap_cd8.csv.gz')))
umap_cd8_fcs_dt[, cluster := factor(cluster)]

umap_cd8_cluster_dt <- umap_cd8_fcs_dt[, list(
  mean_umap_1= mean(umap_1), 
  mean_umap_2= mean(umap_2), 
  size= .N), by=cluster]

# whole plot
ggplot() + 
  geom_point(data=umap_cd8_fcs_dt, 
    aes(x=umap_1, y=umap_2, color=cluster), size=0.2, alpha=0.2) +
  geom_label(data=umap_cd8_cluster_dt, aes(
    x=mean_umap_1, y=mean_umap_2,
    label=paste(cluster,size,sep='\n'), 
    color=cluster), alpha=0.3) +
  theme_minimal()

# individual plots
ggplot() + 
  geom_point(data=umap_cd8_fcs_dt, 
    aes(x=umap_1, y=umap_2, color=cluster), size=0.2, alpha=0.2) +
  geom_label(data=umap_cd8_cluster_dt, aes(
    x=mean_umap_1, y=mean_umap_2,
    label=paste(cluster,size,sep='\n'), 
    color=cluster), alpha=0.3) +
  facet_wrap(~cluster) +
  theme_bw()

### cd4 umap plot

umap_cd4_fcs_dt <- fread(
  file.path(
    here::here('..','data','diff.sampled.umap_cd4.csv.gz')))
umap_cd4_fcs_dt[, cluster := factor(cluster)]

umap_cd4_cluster_dt <- umap_cd4_fcs_dt[, list(
  mean_umap_1= mean(umap_1), 
  mean_umap_2= mean(umap_2), 
  size= .N), by=cluster]

# whole plot
ggplot() + 
  geom_point(data=umap_cd4_fcs_dt, 
    aes(x=umap_1, y=umap_2, color=cluster), size=0.2, alpha=0.2) +
  geom_label(data=umap_cd4_cluster_dt, aes(
    x=mean_umap_1, y=mean_umap_2,
    label=paste(cluster,size,sep='\n'), 
    color=cluster), alpha=0.3) +
  theme_minimal()

# individual plots
ggplot() + 
  geom_point(data=umap_cd4_fcs_dt, 
    aes(x=umap_1, y=umap_2, color=cluster), size=0.2, alpha=0.2) +
  geom_label(data=umap_cd4_cluster_dt, aes(
    x=mean_umap_1, y=mean_umap_2,
    label=paste(cluster,size,sep='\n'), 
    color=cluster), alpha=0.3) +
  facet_wrap(~cluster) +
  theme_bw()

color_plots <- function(umap_df) {
  cd4_colors = brewer.pal('Greens', n=9)[c(2,4,6,8,9)]
  cd8_colors = brewer.pal('Oranges', n=9)[c(2,4,6,8,9)]
  color_time <- ggplot(umap_df[][, cd19 := (k562=='cd19+')], 
      aes(x=umap_1, y=umap_2, color=interaction(day, t_type))) +
    geom_point(size=0.1, alpha=0.5) +
    facet_grid(car~cd19) +
    scale_color_manual(values=c(cd4_colors, cd8_colors)) +
    guides(colour = guide_legend(override.aes = list(alpha=1, size=3)))
  
  color_density <- ggplot(umap_df, aes(x=umap_1, y=umap_2)) +
    geom_hex(bins = 70) +
    facet_grid(car~cd19) +
    scale_fill_continuous(
      type = "viridis", limits=c(0,30), oob=scales::squish) +
    theme_bw()
  
  color_markers <- ggplot(
      melt(umap_df, measure.vars=umap_vars)[, 
        value.scaled := scale(value), by='variable'][,
          cd19 := (k562=='cd19+')], 
      aes(x=umap_1, y=umap_2, z=value.scaled)) +
    stat_summary_hex(bins = 70) +
    facet_grid(variable~cd19) +
    scale_fill_distiller(palette='RdYlBu', limits=c(-3, 3), oob=scales::squish) +
    theme_bw()
  
  grid.arrange(color_time, color_density, color_markers, ncol=3)
}

color_plots(umap_cd4_fcs_dt)

color_plots(umap_cd8_fcs_dt)

# add cd19 and cd4/8 rows to dendro

dendrogram <- function(umap_fcs_dt) {
  cluster_pct_vars <- c(paste0(
    names(channel_map),'_t'), 
    'FSC-A','SSC-A','donor','cd19')
  
  max_cluster <- max(as.numeric(umap_fcs_dt$cluster), na.rm=T)
  
  umap_var_melt_dt <-melt(
    umap_fcs_dt[!is.na(donor)], measure.vars= c(umap_vars,'donor', 'cd19'))
  
  # Use z scale to plot params
  umap_var_melt_dt[, value_scaled := scale(value), by=variable]
  param_cluster_pct_dt <- umap_var_melt_dt[
    !is.na(cluster) & variable %in% cluster_pct_vars,
    list(
      clust_mean = mean(value_scaled),
      clust_sd = sd(value_scaled)),
        by=c('variable','cluster')]
  
  # Make dendrogram
  cluster_mean_cast <- dcast(
    param_cluster_pct_dt, 
    variable ~ cluster, 
    value.var='clust_mean')
  
  cluster_dendro_m <- as.matrix(cluster_mean_cast[, -c(1)])
  rownames(cluster_dendro_m) <- unlist(cluster_mean_cast[,1])
  dendro_clusters <- as.dendrogram(hclust(d = dist(x = t(cluster_dendro_m))))
  dendro_vars <- as.dendrogram(hclust(d = dist(x = cluster_dendro_m)))
  
  # Create dendrogram plot
  dendro_vars_plot <- ggdendrogram(data = dendro_vars, rotate = TRUE) + 
    theme(axis.text.y = element_text(size = 6))
  dendro_clusters_plot <- ggdendrogram(data = dendro_clusters, rotate = TRUE) + 
    theme(axis.text.y = element_text(size = 6))
  
  # column order
  cluster_order <- order.dendrogram(dendro_clusters)
  cluster_names <- colnames(cluster_dendro_m[, cluster_order])
  param_cluster_pct_dt[, cluster:= factor(cluster, levels = cluster_names)]
  
  # row order
  var_order <- order.dendrogram(dendro_vars)
  var_names <- row.names(cluster_dendro_m[var_order, ])
  param_cluster_pct_dt$variable <- factor(
      param_cluster_pct_dt$variable,
      levels = var_names)
  
  # heatmap
  var_heatmap <- ggplot(param_cluster_pct_dt) + 
      geom_tile(aes(x=cluster, y=variable, fill=clust_mean), color='black') +
      scale_fill_distiller(palette='PiYG', limits=c(-3,3), oob=scales::squish) +
      theme_minimal()
  
  # col dendro
  dendro_data_col <- dendro_data(dendro_clusters, type = "rectangle")
  dendro_col <- axis_canvas(var_heatmap, axis = "x") + 
      geom_segment(data = segment(dendro_data_col), 
          aes(x = x, y = y, xend = xend, yend = yend))
  plot_dendroheat <- insert_xaxis_grob(var_heatmap, 
              dendro_col, grid::unit(0.2, "null"), position = "top")
  ggdraw(plot_dendroheat)
}

#cd8
dendrogram(umap_cd8_fcs_dt)
## Warning in melt.data.table(umap_fcs_dt[!is.na(donor)], measure.vars =
## c(umap_vars, : 'measure.vars' [ccr7_t, gfp_t, myc_t, cd45ra_t, ...] are not all
## of the same type. By order of hierarchy, the molten data value column will be of
## type 'double'. All measure variables not of type 'double' will be coerced too.
## Check DETAILS in ?melt.data.table for more on coercion.

#cd4
dendrogram(umap_cd4_fcs_dt)
## Warning in melt.data.table(umap_fcs_dt[!is.na(donor)], measure.vars =
## c(umap_vars, : 'measure.vars' [ccr7_t, gfp_t, myc_t, cd45ra_t, ...] are not all
## of the same type. By order of hierarchy, the molten data value column will be of
## type 'double'. All measure variables not of type 'double' will be coerced too.
## Check DETAILS in ?melt.data.table for more on coercion.

Percent of cells in day, by cluster

Percent of cells in cluster, by day

CAR differences per cluster

summed across cols

##### summed across rows

Bar charts showing how each CAR changes cluster by day

car_clusters_by_day <- function(umap_fcs_dt) {
  car_cluster_day_indiv_k562_bar_pct_dt <- map_day_0(umap_fcs_dt)[,
    data.table(table(car,cluster,k562,t_type,day))][,
      list(day, pct=N/sum(N)), by=c('car','k562','t_type','cluster')][,
        pct_delta := scale(pct), by=c('car','k562','t_type','cluster')]
  
  car_cluster_day_indiv_k562_bar_pct_dt[,
  cluster := factor(cluster, levels = cluster_names)]
  
  car_cluster_day_indiv_k562_bar_pct_dt$day <- factor(car_cluster_day_indiv_k562_bar_pct_dt$day, levels=c('0','6','15','24','33'))
  
  #filter for CD19+, only CARs > 10% in a cluster in a given day
  car_cluster_day_indiv_k562_bar_pct_dt %>% 
    filter(k562=='cd19+', pct > 0.10) %>%
    ggplot() + geom_bar(aes(x=cluster, y=pct), color='black', stat='identity') +
      facet_grid(car~day, scales='free_x', space='free_x') +
      labs(title='CAR enrichment (>10%)\nper cluster per day', y='CAR',
        subtitle='CAR enrichment in cluster\n(absolute %) (summed across rows by cluster number)')
}

#cd8
car_clusters_by_day(umap_cd8_fcs_dt)

#cd4
car_clusters_by_day(umap_cd4_fcs_dt)

Bar charts showing distribution of each CAR in clusters per day

car_clusters_by_square <- function(umap_fcs_dt) {
  car_cluster_day_indiv_k562_total_pct_dt <- map_day_0(umap_fcs_dt)[,
      data.table(table(car,cluster,k562,t_type,day))][,
        list(cluster, pct=N/sum(N)), by=c('car','k562','t_type','day')][,
          pct_delta := scale(pct), by=c('car','k562','t_type','day')]
    
    car_cluster_day_indiv_k562_total_pct_dt[,
    cluster := factor(cluster, levels = cluster_names)]
    
    car_cluster_day_indiv_k562_total_pct_dt$day <- factor(car_cluster_day_indiv_k562_total_pct_dt$day, levels=c('0','6','15','24','33'))
  #filter for  CD19+, only CARs > 10% in a cluster in a given day
  car_cluster_day_indiv_k562_total_pct_dt %>% 
    filter(k562=='cd19+', pct>.10) %>%
    ggplot() + geom_bar(aes(x=cluster, y=pct), color='black', stat='identity') +
      facet_grid(car~day, scales='free_x', space='free_x') +
      labs(title='CD8 CAR enrichment (>10%)\nper day per cluster', y='CAR',
        subtitle='CAR enrichment in cluster\n(z-score) (summed across CAR x day (each square))')
}

#cd8
car_clusters_by_square(umap_cd8_fcs_dt)

#cd4
car_clusters_by_square(umap_cd4_fcs_dt)